#!/usr/bin/perl -w
# Bug reports and comments to Igor.Ermolaev@intel.com
use strict;
no warnings 'portable';
use File::Spec;
use Cwd 'realpath';

use constant { FALSE=>0, TRUE=>-1 };

my @symbols_files = ();
my $distrib_file  = undef;

my @modules      = ();
my @symbols      = ();
my %bases        = ();
my $ctb_rte      = 1;
my $ccl_rte      = undef;
my @profile      = ();
my $total_instrs = 0;
my $total_cycles = 0;
my $tid          = undef;
my $blocks_req   = FALSE;
my $max_adj      = undef;
my $ccpi         = undef;
my $tscale       = undef;
my $tavg_instrs  = undef;
my $grps_ovd     = FALSE;
my $grps_cnt     = 7;
my $grps_ref     = 5;

sub resize_set
{
   my $ref = shift;
   my $def = shift;
   my $idx = shift;
   my $ldx = scalar @{$ref};for($ldx..$idx){ push @{$ref}, $def; }
   $$ref[$idx] = shift;
}

sub resize_add
{
   my $ref = shift;
   my $def = shift;
   my $idx = shift;
   my $ldx = scalar @{$ref};for($ldx..$idx){ push @{$ref}, $def; }
   $$ref[$idx] += shift;
}

if( $#ARGV >= 1 && $ARGV[0] eq '-ts' )
{
   $tscale = 1.0*$ARGV[1];
   shift @ARGV;shift @ARGV;
}
if( $#ARGV >= 1 && $ARGV[0] eq '-ti' )
{
   $tavg_instrs = int $ARGV[1];
   shift @ARGV;shift @ARGV;
}
if( $#ARGV >= 1 && $ARGV[0] eq '-tid' )
{
   $tid = int $ARGV[1];
   shift @ARGV;shift @ARGV;
}
if( $#ARGV >= 0 && $ARGV[0] eq '-d' )
{
   $blocks_req = TRUE;
   shift @ARGV;
}
if( $#ARGV >= 1 && $ARGV[0] eq '-g' )
{
   if( $ARGV[1] =~ /^(\d+)\:(\d+)$/o ){ $grps_ovd=TRUE;$grps_cnt=int $1;$grps_ref=int $2; }
   shift @ARGV;shift @ARGV;
}
if( $#ARGV >= 1 && $ARGV[0] eq '-a' )
{
   $max_adj = 1.0*$ARGV[1];
   shift @ARGV;shift @ARGV;
}
if( $#ARGV >= 1 && $ARGV[0] eq '-c' )
{
   $ccpi = $ARGV[1];
   shift @ARGV;shift @ARGV;
}

my $j = 0;
if( $#ARGV >= 0 )
{
   my $i = 0;
   my $end_sep = undef;
   if( $ARGV[$j] eq '-s' ){ $j++;$end_sep='-s'; }
   while( $j <= $#ARGV )
   {
      if( defined $end_sep && $ARGV[$j] eq $end_sep ){ $j++;last; }
      push @symbols_files, $ARGV[$j];
      my @lsymbols = ();
      push @modules, [];
      my $module_idx = undef;
      my $symbol_idx = undef;
      open SYMS, "<$ARGV[$j]" or die;
      while( <SYMS> )
      {
         if( /\s*\<\s*(module|symbol)(\s+|^)/io )
         {
            if( lc $1 eq 'module' )
            {
               $symbol_idx = undef;
               push @{$modules[$i]}, {};
               $module_idx = $#{$modules[$i]};
            }
            elsif( lc $1 eq 'symbol' )
            {
               push @lsymbols, { module=>$module_idx, count=>0, instrs=>0, cycles=>0, ninstrs=>0.0 };
               $symbol_idx = $#lsymbols;
            }
         }
         if( /\s+name\s*=\s*['"]([^'"]+)['"]/io )
         {
               $lsymbols[$symbol_idx]{name} = $1 if defined $symbol_idx;
            $modules[$i][$module_idx]{name} = $1 if defined $module_idx && !defined $symbol_idx;
         }
         if( /\s+base\s*=\s*['"]((0x)?[[:xdigit:]]+)['"]/io )
         {
               $lsymbols[$symbol_idx]{base} = hex $1 if defined $symbol_idx;
            $modules[$i][$module_idx]{base} = hex $1 if defined $module_idx && !defined $symbol_idx;
         }
         if( /\s+path\s*=\s*['"]([^'"]+)['"]/io )
         {
            $modules[$i][$module_idx]{path} = $1 if defined $module_idx;
         }
      }
      close SYMS;
      @lsymbols = sort { $a->{base} <=> $b->{base} } @lsymbols;
      push @symbols, \@lsymbols;
      for(0..$#{$modules[$i]})
      {
         if( exists $modules[$i][$_]{base} )
         {
            my %base = ( base=>$modules[$i][$_]{base}, mid=>$_ );
            if( exists $modules[$i][$_]{path} )
            {
               my $path = $modules[$i][$_]{path};
               my ( $vol, $dir, $name ) = File::Spec->splitpath( $path );
               $bases{$path} = [] if !exists $bases{$path};resize_set $bases{$path}, undef, $i, \%base;
               $bases{$name} = [] if !exists $bases{$name};resize_set $bases{$name}, undef, $i, \%base;
               my $link = realpath( $path );
               if( defined $link && $link ne $path )
               {
                  my ( $vol, $dir, $name ) = File::Spec->splitpath( $link );
                  $bases{$link} = [] if !exists $bases{$link};resize_set $bases{$link}, undef, $i, \%base;
                  $bases{$name} = [] if !exists $bases{$name};resize_set $bases{$name}, undef, $i, \%base;
               }
            }
            elsif( exists $modules[$i][$_]{name} )
            { 
               my $name = $modules[$i][$_]{name};
               $bases{$name} = [] if !exists $bases{$name};resize_set $bases{$name}, undef, $i, \%base; 
            }
         }
      }
      $j++;
      last if !defined $end_sep;
      $i++;
   }
}
if( $#ARGV >= $j )
{
   my $cur_mdl   = undef;
   my $prf_base  = undef;
   my $cur_bases = undef;
   $distrib_file = $ARGV[$j];
   open PROF, "<$ARGV[$j]" or die;
   $j++;
   while( <PROF> )
   {
      if( /^#(\d+)(\s(\d+))?$/o )
      {
         $ctb_rte = int $1;
         $ccl_rte = int $3 if defined $3;
      }
      elsif( /^#(.*)\:([[:xdigit:]]+)$/o )
      {
         $prf_base = hex $2;
         if( !defined $1 )
         {
            $cur_mdl   = '';
            $cur_bases = undef;
         }
         elsif( exists $bases{$1} )
         {
            $cur_mdl   = $1;
            $cur_bases = $bases{$1};
         }
         else
         {
            $cur_mdl = undef;
            my ( $vol, $dir, $name ) = File::Spec->splitpath( $1 );
            if( exists $bases{$name} ){ $cur_mdl = $name; }
            else
            {
               my $link = realpath( $1 );
               if( defined $link && $link ne $1 )
               {
                   my ( $vol, $dir, $name ) = File::Spec->splitpath( $link );
                   $cur_mdl = $name if exists $bases{$name};
               }
            }
            $cur_bases = defined $cur_mdl ? $bases{$cur_mdl} : undef;
            $prf_base  = undef if !defined $cur_bases;
         }
         if( defined $prf_base && defined $cur_bases ){ for(0..$#{$cur_bases}){ $modules[$_][$$cur_bases[$_]{mid}]{prf_base} = $prf_base if defined $$cur_bases[$_]; } }
      }
      elsif( /^([[:xdigit:]]+)\s(\d+)\s(\d+)(\s(\d+))?$/o )
      {
         next if !defined $prf_base;
         $total_instrs+= int $3;
         if( defined $5 )
         {
            push @profile, { active=>TRUE, addr=>$prf_base+hex $1, size=>int $2, offs=>hex $1, instrs=>int $3, cycles=>int $5, hits=>0, imixes=>[], weights=>[], ibases=>$cur_bases };
            $total_cycles+= int $5;
         }
         else
         {
            push @profile, { active=>TRUE, addr=>$prf_base+hex $1, size=>int $2, offs=>hex $1, instrs=>int $3,               , hits=>0, imixes=>[], weights=>[], ibases=>$cur_bases };
         }
      }
   }
   close PROF;
   @profile = sort { $a->{addr} <=> $b->{addr} } @profile;
}

sub bin_search
{
   my $val = shift;
   my $key = shift;
   my $ref = shift;

   my $min_idx = 0;
   my $max_idx = $#{$ref} + 1;
   my $idx     = 0;
   while( $min_idx != $max_idx )
   {
      $idx = $min_idx + ( ( $max_idx - $min_idx ) >> 1 );
      if( $ref->[$idx]{$key} == $val ){ $min_idx = $idx + 1;last; }
      if( $ref->[$idx]{$key} <  $val ){ $min_idx = $idx + 1;      }
      else                            { $max_idx = $idx;          }
   }
   return $min_idx;
}

my @totals       = ();
my @matcheds     = ();
my @ignored      = ();
my $count        = undef;
my $cur_tid      = undef;
my $cur_blk      = undef;
my %blocks       = ();
my @info         = ();
my $weights_rqrd = FALSE;
my @lcpis        = ();
my @quals        = ();
my @imixes       = @ARGV;
splice @imixes, 0, $j;
if( @imixes ){ $weights_rqrd = TRUE; }
else         { @imixes = ( '-' );    }
@imixes = map{ $_ eq '-' ? '<-' : $_ } @imixes;
(scalar @symbols_files) == (scalar @imixes) or die;
for my $i (0..$#imixes)
{
   if( $weights_rqrd == TRUE && @symbols_files && defined $distrib_file )
   {
      if( $imixes[$i] eq '<-' )
      {
         push @quals, ( scalar @imixes == 1 ? 1.0 : undef );
         push @lcpis, undef if defined $ccpi;
      }
      else
      {
         my @recs = ();
         my $grps = $grps_ovd == TRUE ? "-g $grps_cnt:$grps_ref " : '';
         if( defined $ccpi )
         {
            @recs = split /\n/, `$0 $grps$symbols_files[$i] $distrib_file $imixes[$i]|grep -m 2 "^$ccpi\\|^Quality"`;
         }
         elsif( scalar @imixes != 1.0 )
         {
            @recs = split /\n/, `$0 $grps$symbols_files[$i] $distrib_file $imixes[$i]|grep -m 1 '^Quality'`;
         }
         if( defined $ccpi )
         {
             my $res = '';
             if( $ccpi ne 'sCPI' ){ $res = $recs[0] if @recs; }
             else
             {
                my ( $vol, $dir, $name ) = File::Spec->splitpath( $imixes[$i] );
                my $fname = File::Spec->catpath( $vol, $dir, 'groups.txt' );
                $res = `grep -m 1 "^$ccpi" "$fname"`;
             }
             if( $res =~ /.*CPI\s*\:\s*(\d+(\.\d+)?)/io ){ push @lcpis, 1.0*$1; }else{ push @lcpis, undef; }
          }
          if( @recs && $recs[-1] =~ /Quality\s*\:\s*(\d+(\.\d+)?)/io ){ push @quals, $1/100.0; }else{ push @quals, ( scalar @imixes == 1 ? 1.0 : undef ); }
      }
   }
   my @lmodules = ();for(0..$#{$modules[$i]}){ push @lmodules, { base=>$modules[$i][$_]{base}, mid=>$_ } }
      @lmodules = sort{ $a->{base} <=> $b->{base} } @lmodules;
   push @info    , [];
   push @totals  , 0;
   push @matcheds, 0;
   push @ignored , 0;
   my $seen_global = FALSE;
   my $sub_total   = 0;
   my $percent     = 0.0;
   open IMIX, $imixes[$i] or die;
   while( <IMIX> )
   {
      if( /\s*EMIT_TOP_BLOCK_STATS\s+FOR\s+TID\s+(\d+)\s+(OS-TID\s+(\d+)\s+)?EMIT\s+/io )
      {
         $cur_tid = int $1;
      }
      elsif( /\s*END_TOP_BLOCK_STATS\s*/io )
      {
         $cur_tid = undef;
         $cur_blk = undef;
      }
      elsif( /\sPC\s*\:\s*0*([[:xdigit:]]+)\s*ICOUNT\s*\:\s*([0-9,]+)\s*EXECUTIONS\s*\:\s*([0-9,]+)\s*#BYTES\s*\:\s*(\d+)\s.*\s+cumltv\%\s*\:\s+(\d+(\.\d+)?)(\s+|^)/io )
      {
         next if defined $tid && ( !defined $cur_tid || $cur_tid != $tid );
         my $addr   = lc $1;
         my $size   = int $4;
         my $it     = $2;
         my $ct     = $3;
         my $p      = 1.0*$5;
         $it =~ tr/,//d;$ct =~ tr/,//d;
         my $instrs = defined $tscale ? int(0.5+$tscale*$it) : int $it;
         $sub_total+= $instrs;
         $count     = defined $tscale ? int(0.5+$tscale*$ct) : int $ct;
         $percent   = $p if $p > $percent;
         if( exists $blocks{$addr} ){ $cur_blk = $blocks{$addr}; }
         else
         {
            push @{$info[$i]}, { addr=>$addr, size=>$size, instrs=>0, active=>TRUE };
            $cur_blk       = $#{$info[$i]};
            $blocks{$addr} = $cur_blk;
         }
         $info[$i][$cur_blk]{instrs}+= $instrs;
      }
      elsif( /\s*XDIS\s+((0x)?[[:xdigit:]]+)\s*\:(\s*\w+\s*[[:xdigit:]]+\s*(\w+))?/io )
      {
         my $instr = lc $4;
         next if !@{$symbols[$i]} || defined $tid && ( !defined $cur_tid || $cur_tid != $tid );
         my  $addr = hex $1;
         next if $addr > 0x7fff00000000;

         my $active = TRUE;
         if( $instr =~ /lock|pause/o || $instr eq 'rep' )
         {
            $active = FALSE;
            $info[$i][$cur_blk]{active} = FALSE if defined $cur_blk;
         }

         my $fidx = bin_search $addr, 'base', \@{$symbols[$i]};
         if( $fidx > 0 )
         {
            $symbols[$i][$fidx-1]{count}+= $count;
            ( $symbols[$i][$fidx-1]{base} <= $addr && ( $fidx > $#{$symbols[$i]} || $addr < $symbols[$i][$fidx]{base} ) ) or die;
            $info[$i][$cur_blk]{symbol}  = $fidx-1 if defined $cur_blk && !exists $info[$i][$cur_blk]{symbol};
         }
         my $mid = undef;
         if( $fidx > 0 ){ $mid = $symbols[$i][$fidx-1]{module}; }
         else
         {
            $mid = bin_search $addr, 'base', \@lmodules;
            if( $mid > 0 ){ ( $lmodules[$mid-1]{base} <= $addr && ( $mid > $#lmodules || $addr < $lmodules[$mid]{base} ) ) or die; $mid = $lmodules[$mid-1]{mid}; }else{ $mid = undef; }
         }
         next if !@profile || !defined $mid || !exists $modules[$i][$mid]{prf_base};
            $addr+= $modules[$i][$mid]{prf_base} - $modules[$i][$mid]{base}; # to profile address space
         my $bidx = bin_search $addr, 'addr', \@profile;
         if( $bidx > 0 && $profile[$bidx-1]{addr} <= $addr && $profile[$bidx-1]{size} > $addr - $profile[$bidx-1]{addr} )
         {
            $profile[$bidx-1]{active} = FALSE if $active != TRUE; # these BBs don't correlate between Si run and emulation
            $profile[$bidx-1]{hits}  += $count;
            resize_add $profile[$bidx-1]{imixes}, 0, $i, $count;
            $matcheds[$i]            += $count;
            if( $fidx > 0 )
            {
               $profile[$bidx-1]{symbols} = [] if !exists $profile[$bidx-1]{symbols};
               ( (scalar @{$profile[$bidx-1]{symbols}}) <= $i || !defined $profile[$bidx-1]{symbols}[$i] || $profile[$bidx-1]{symbols}[$i] == $fidx-1 ) or die;
               resize_set $profile[$bidx-1]{symbols}, undef, $i, $fidx-1;
            }
         }
      }
      elsif( /^\s*#\s*EMIT_GLOBAL_DYNAMIC_STATS/io )
      {
         $seen_global = TRUE;
      }
      elsif( /^\s*\*total\s+([0-9,]+)/io )
      {
         my $it = $1;$it =~ tr/,//d;my $tt = defined $tscale ? int(0.5+$tscale*$it) : int $it;
         if( $seen_global == TRUE )
         {
            $totals[$i]  = $tt;
            $sub_total   = 0;
            $percent     = 0.0;
            $seen_global = FALSE;
         }
         elsif( $sub_total > 0 )
         {
            next if defined $tid && ( !defined $cur_tid || $cur_tid != $tid );
            $sub_total  = int( $sub_total * 100.0 / $percent );
            $totals[$i]+= int $tt > $sub_total ? int $tt : $sub_total;
            $sub_total  = 0;
            $percent    = 0.0;
         }
      }
   }
   close IMIX;
   $totals[$i]+= int( $sub_total * 100.0 / $percent ) if $sub_total > 0;
}
if( @quals )
{
   my $qsum = 0.0;
   my $qcnt = 0;
   for(@quals){ if( defined $_ ){ $qsum += $_;$qcnt++; } }
   my $qrst = 0.0;
   for(0..$#quals){ if( defined $quals[$_] ){ $quals[$_]/= $qsum; }else{ $quals[$_] = $qrst; } }
}
else
{
   my $qrst = 1.0/(scalar @imixes);
   for(0..$#imixes){ push @quals, $qrst; }
}
my $ign_instrs = 0;
my $ign_cycles = 0;
if(@profile)
{
   for my $p (@profile){if( $p->{active} != TRUE ){
     if( $p->{hits} > 0 )
     {
         $ign_instrs+= $p->{instrs};
         $ign_cycles+= $p->{cycles} if exists $p->{cycles};
     }
     my $ldx = (scalar @{$p->{imixes}})-1;$ldx >= 0 or die;
     for(0..$ldx)
     {
                     $matcheds[$_]-= $p->{imixes}[$_];
                      $ignored[$_]+= $p->{instrs} if $p->{imixes}[$_] > 0;
         my                   $sym = $p->{symbols}[$_];
         $symbols[$_][$sym]{count}-= $p->{imixes}[$_] if defined $sym;
      }
   }}
}
else
{
   for my $i (0..$#info)
   {
       my $cnt = 0;
       for(@{$info[$i]}){if( $_->{active} != TRUE ){
                               $cnt+= $_->{instrs};
          my                   $sym = $_->{symbol};
          $symbols[$i][$sym]{count}-= $_->{instrs} if defined $sym;
       }}
       $matcheds[$i] = $totals[$i]-$cnt;
   }
}
my $gtotal   = 0;for(@totals  ){$gtotal  +=$_;}
my $gmatched = 0;for(@matcheds){$gmatched+=$_;}

sub rdiff
{
   return 2.0*abs($_[0]-$_[1])/($_[0]+$_[1]);
#   return abs($_[0]-$_[1])/($_[0]>=$_[1]?$_[0]:$_[1]);
#   return abs($_[0]-$_[1])/($_[0]<=$_[1]?$_[0]:$_[1]);
}

sub adiff
{
   return abs($_[0]-$_[1]);
}
# keep BBs only covered by the trace
@profile = sort { $b->{hits} <=> $a->{hits} } grep { $_->{hits} > 0 && $_->{active} == TRUE } @profile;

my @weights = ();
my  @scales = ();
my @linstrs = ();
my @lcycles = ();
for(0..$#imixes)
{
   push @weights, 0.0;
   push @linstrs, 0;
   push @lcycles, 0.0;
}

my $prf_instrs = 0;
my $prf_cycles = 0.0;
my $trc_instrs = 0;
my $trc_cycles = 0.0;
for my $p (@profile)
{
   $prf_instrs+= $p->{instrs};
   $prf_cycles+= $p->{cycles} if exists $p->{cycles};
   $trc_instrs+= $p->{hits};
   my     $cpi = exists $p->{cycles} && $p->{instrs} > 0 ? ($p->{cycles}/$p->{instrs})*($ccl_rte/$ctb_rte) : 0.0;
   $trc_cycles+= $p->{hits}*$cpi;

   $p->{ratio} = $p->{instrs}/$p->{hits};
   my    $wsum = 0.0;
   my     $ldx = (scalar @{$p->{imixes}})-1;$ldx >= 0 or die;
   for(0..$ldx)
   {
      my     $w = 0.0;
      my $lhits = $p->{imixes}[$_];
      if( $lhits > 0 )
      {
         #$w    = ( $lhits/$p->{hits} ) * ( $lhits/$totals[$_] ); # under flag
         ##$w    = 0.5 * ( $lhits/$p->{hits} + $lhits/$totals[$_] ); # under flag
         #$w    = ( 1.0 - $lhits/$p->{hits} + $lhits/$totals[$_] ); # under flag
         #$w    = ( $lhits/$totals[$_] ) * ( $gtotal/$p->{hits} ); # under flag
         $w    = $lhits*$quals[$_]/$p->{hits}; # under flag
         $wsum+= $w;

         $linstrs[$_]+= $p->{imixes}[$_];
         $lcycles[$_]+= $p->{imixes}[$_]*$cpi;
      }
      push @{$p->{weights}}, $w;
   }
   for(0..$ldx){ if( $p->{imixes}[$_] > 0 ){ $p->{weights}[$_]/= $wsum;$weights[$_]+= $p->{instrs}*$p->{weights}[$_]; } }
}
if( $prf_instrs > 0 ){ for(0..$#weights){ $weights[$_]/=$prf_instrs; } }

my @ixmap = ();for( my $i = 0; $i < scalar @profile; $i++ ){ push @ixmap, $i; }
   @ixmap = sort { $profile[$a]{ratio} <=> $profile[$b]{ratio} } @ixmap;
my  $ecnt = scalar @ixmap;
my  $fcnt = $ecnt;
my    $i0 = int(    0.25*$fcnt); # floor
my    $p0 = $ecnt > 0 ? $profile[$ixmap[$i0]]{ratio} : 0.0;
my    $i1 = int(0.9+0.75*$fcnt); # ceil
      $i1++ if $i1 == $i0;
      $i1-- if $i1 == $ecnt;
my    $p1 = $ecnt > 0 ? $profile[$ixmap[$i1]]{ratio} : 0.0;
my    $md = 0.0;
my    $mg = 0;
#print "$p0 $p1\n";

my @pivots = ( $p0 );
my @groups = ( [] );
my @deltas = ( [] );
for( my $ostp = 1; $ostp <= $grps_cnt-1; $ostp++ )
{
   my $g0 = -1;
   my $g1 = -1;
   if( $mg >= 0 ) # split $mg into $g0:$p0,$g1:$p1
   {
      $pivots[$mg] = $p0;
      push @pivots, $p1;
      push @groups, [];
      push @deltas, [];
      $g0 = $mg;
      $g1 = $#pivots;
   }
   for( my $istp = 1; $istp <= $grps_ref; $istp++ )
   {
      for( my $j = 0; $j < scalar @pivots; $j++ )
      {
         splice @{$groups[$j]};
         splice @{$deltas[$j]};
      }
      for( my $i = 0; $i < scalar @profile; $i++ )
      {
         my $mj = 0;
         my $md = adiff($pivots[0],$profile[$i]{ratio});
         for( my $j = 1; $j < scalar @pivots; $j++ ){ my $d = adiff($pivots[$j],$profile[$i]{ratio}); if( $d < $md ){ $md = $d; $mj = $j; } }
         push @{$groups[$mj]}, $i;
         push @{$deltas[$mj]}, $md;
      }
      $mg = -1;
      $md = 0.0;
      for( my $j = 0; $j < scalar @groups; $j++ )
      {
         my $ecnt = scalar @{$groups[$j]};
         next if $ecnt == 0;
         my $fcnt = 0;
         if   ( $j == $g0 ){ $fcnt = $ecnt; $g0 = -1; }
         elsif( $j == $g1 ){ $fcnt = $ecnt; $g1 = -1; }
         else
         {
            splice @ixmap;
            for( my $i = 0; $i < $ecnt; $i++ ){ push @ixmap, $i; }
            @ixmap = sort { ${$deltas[$j]}[$a] <=> ${$deltas[$j]}[$b] } @ixmap;

            my $s2 = 0.0;
            for( $fcnt = 0; $fcnt < $ecnt; $fcnt++ )
            {
               my $d = ${$deltas[$j]}[$ixmap[$fcnt]];
                 $s2+= $d*$d;
               next if $fcnt < 2;               # approximation of sigma is good for n >= 3 (bias <= 1.3%)
               my $s = sqrt($s2/($fcnt+1-1.5)); # unbiased sample standard deviation
               last if $d >= 2.0*$s;            # probablity is less than 0.05
            }
         }
         if( $fcnt == $ecnt )
         {
            my   $hits = 0;
            my $instrs = 0;
            for(@{$groups[$j]}){ $hits += $profile[$_]{hits}; $instrs += $profile[$_]{instrs}; }
            $pivots[$j] = $instrs/$hits;
         }
         else
         {
            my   $fhits = 0;
            my $finstrs = 0;
            my    $hits = 0;
            my  $instrs = 0;
            my       $i = 0;
            for(@ixmap)
            {
               my   $k = ${$groups[$j]}[$_];
                 $hits+= $profile[$k]{hits};
               $instrs+= $profile[$k]{instrs};
                    $i++;
               if( $i == $fcnt ){ $fhits = $hits; $finstrs = $instrs; }
            }
            $pivots[$j] = $finstrs/$fhits;
            #my       $d = rdiff($pivots[$j],$instrs/$hits);
            @ixmap = sort { $profile[${$groups[$j]}[$a]]{ratio} <=> $profile[${$groups[$j]}[$b]]{ratio} } @ixmap;
            my $i1 = int(    0.25*$ecnt); # floor
            my $k1 = ${$groups[$j]}[$ixmap[$i1]];
            my $q1 = $profile[$k1]{ratio};
            my $i3 = int(0.9+0.75*$ecnt); # ceil
               $i3-- if $i3 == $ecnt;
            my $k3 = ${$groups[$j]}[$ixmap[$i3]];
            my $q3 = $profile[$k3]{ratio};
            my $d  = 1.0*($q3-$q1)/($q3+$q1); # quartile coefficient of dispersion

            if( $d > $md )
            {
                  $md = $d;
                  $mg = $j;
               #@ixmap = sort { $profile[${$groups[$j]}[$a]]{ratio} <=> $profile[${$groups[$j]}[$b]]{ratio} } @ixmap;
               my $i0 = int(    0.25*$ecnt); # floor
               my $k0 = ${$groups[$j]}[$ixmap[$i0]];
                  $p0 = $profile[$k0]{ratio};
               my $i1 = int(0.9+0.75*$ecnt); # ceil
                  $i1++ if $i1 == $i0;
                  $i1-- if $i1 == $ecnt;
               my $k1 = ${$groups[$j]}[$ixmap[$i1]];
                  $p1 = $profile[$k1]{ratio};
            }
         }
         #print "$pivots[$j] ";
      }
      #print "\n";
   }
}


my   $max_hits = 0;
my $max_instrs = 0;
for my $g (@groups)
{ 
   next if !@{$g};
   my   $hits = 0;
   my $instrs = 0;
   for(@{$g})
   {
        $hits+= $profile[$_]{hits};
      $instrs+= $profile[$_]{instrs};
   }
   my  $ratio = $instrs/$hits;
   for(@{$g}){ $profile[$_]{ninstrs} = $profile[$_]{hits}*$ratio; }
   #my $share  = $hits/$gtotal;
   #my $weight = $instrs/$total_instrs;
   #my $gsize  = scalar @{$g};
   #printf "%.2f%%->%.2f%% (%d@%.5e)   ", 100.0*$share, 100.0*$weight, $gsize, $ratio;
   if( $max_hits < 0.075*$gtotal || $max_instrs < 0.01*$total_instrs || $hits >= 0.075*$gtotal && $instrs >= 0.01*$total_instrs && $ratio < $max_instrs/$max_hits )
   {
        $max_hits = $hits;
      $max_instrs = $instrs;
   }
   #if( $hits > $max_hits )
   #{
   #     $max_hits = $hits;
   #   $max_instrs = $instrs;
   #}
   #$max_hits  += $hits;
   #$max_instrs+= $instrs;
}
#print "\n";

if(0){
my   $max_hits = 0;
my $max_instrs = 0;
for(@profile)
{ 
   if( $_->{hits} > $max_hits )
   {
        $max_hits = $_->{hits};
      $max_instrs = $_->{instrs};
#      print "$_->{addr} $_->{hits} $_->{instrs}\n";
   }
}
}
my $norm = $max_hits > 0 ? $max_instrs/$max_hits : 1.0;
#print "$norm\n";
my @gsymbols = ();
my %gsmap    = ();
for my $i (0..$#symbols)
{
   for(@{$symbols[$i]})
   {
      my ( $vol, $dir, $name ) = File::Spec->splitpath( exists $modules[$i][$_->{module}]{path} ? $modules[$i][$_->{module}]{path} : $modules[$i][$_->{module}]{name} );
      my $gsid = undef;
      if( exists $gsmap{$name}{$_->{name}} ){ $gsid = $gsmap{$name}{$_->{name}}; }
      else
      {
         push @gsymbols, { name=>$_->{name}, base=>$_->{base}, module=>$name, count=>0, instrs=>0, cycles=>0, ninstrs=>0.0, counts=>[] };
         $gsid = $#gsymbols;
         $gsmap{$name}{$_->{name}} = $gsid;
      }
      $gsymbols[$gsid]{count}+= $_->{count};
      resize_add $gsymbols[$gsid]{counts}, 0, $i, $_->{count};
   }
}
for my $p (@profile)
{ 
   if( exists $p->{symbols} )
   {
      my $gsymb_prcd = FALSE;
      for(0..$#{$p->{symbols}})
      {
         my $sid = $p->{symbols}[$_];
         if( defined $sid )
         {
            $symbols[$_][$sid]{instrs} += $p->{instrs};
            $symbols[$_][$sid]{cycles} += $p->{cycles} if exists $p->{cycles};
            $symbols[$_][$sid]{ninstrs}+= $p->{instrs}*$p->{weights}[$_];
            if( $gsymb_prcd == FALSE )
            {
               my $symb = $symbols[$_][$sid]{name};
               my  $mid = $symbols[$_][$sid]{module};
               my ( $vol, $dir, $name ) = File::Spec->splitpath( exists $modules[$_][$mid]{path} ? $modules[$_][$mid]{path} : $modules[$_][$mid]{name} );
               exists $gsmap{$name}{$symb} or die;
               $gsymb_prcd = TRUE;
               my    $gsid = $gsmap{$name}{$symb};
               $gsymbols[$gsid]{instrs}+= $p->{instrs};
               $gsymbols[$gsid]{cycles}+= $p->{cycles} if exists $p->{cycles};
               my $ninstrs = $p->{hits}*$norm;
               if( exists $p->{ninstrs} ){ $ninstrs =     $p->{ninstrs} if $ninstrs > $p->{ninstrs}; }
               else                      { $ninstrs = 1.0*$p->{instrs}  if $ninstrs > $p->{instrs} ; }
               $gsymbols[$gsid]{ninstrs}+= $ninstrs;
            }
         }
      }
   }
}

#my $trc_instrs = 0;
#my $trc_cycles = 0.0;
#for(@gsymbols)
#{
#   if( $_->{count} > 0 && $_->{instrs} > 0 && exists $_->{cycles} )
#   {
#      $trc_instrs += $_->{count};
#      $trc_cycles += $_->{count} * ($_->{cycles}/$_->{instrs}) * ($ccl_rte/$ctb_rte); # global cycles
#   }
#}
#for my $i (0..$#symbols)
#{
#   my $instrs = 0;
#   my $cycles = 0.0;
#   for(@{$symbols[$i]})
#   {
#      if( $_->{count} > 0 && $_->{instrs} > 0 && exists $_->{cycles} )
#      {
#         $instrs += $_->{count};
#         $cycles += $_->{count} * ($_->{cycles}/$_->{instrs}) * ($ccl_rte/$ctb_rte); # local cycles
#      }
#   }
#   resize_set \@lcpis, undef, $i, $cycles/$instrs if $instrs > 0 && $cycles > 0 && ( (scalar @lcpis) <= $i || !defined $lcpis[$i] );
#}
my $grp_instrs = 0;
my $grp_cycles = 0.0;
my @grps_info  = ();
for my $g (@groups)
{ 
   my $gsize  = scalar @{$g};
   next if $gsize == 0;
   my $hits   = 0;
   my $instrs = 0;
   my $cycles = 0;
   for(@{$g})
   {
      $hits  += $profile[$_]{hits};
      $instrs+= $profile[$_]{instrs};
      $cycles+= $profile[$_]{cycles} if exists $profile[$_]{cycles};
   }
   $grp_instrs+= $hits;
   my $cpi     = 0.0;
   if( $cycles > 0 )
   {
      $cpi        = ($cycles/$instrs)*($ccl_rte/$ctb_rte);
      $grp_cycles+= $hits*$cpi;
#      for my $p (@{$g})
#      {
#         my $ldx = (scalar @{$profile[$p]{imixes}})-1;
#         for(0..$ldx)
#         {
#            $linstrs[$_] += $profile[$p]{imixes}[$_];
#            $lcycles[$_] += $profile[$p]{imixes}[$_]*$cpi;
#         }
#      }
   }
   push @grps_info, { size=>$gsize, instrs=>$instrs, hits=>$hits, cpi=>$cpi };
}
if( $trc_instrs == 0 )
{
   $trc_instrs = $grp_instrs;
   $trc_cycles = $grp_cycles;
}
my $wcpi = 0.0;
my $pcpi = $prf_cycles > 0 ? ($prf_cycles/$prf_instrs)*($ccl_rte/$ctb_rte) : 0.0;
if( $weights_rqrd == TRUE )
{
   if( @lcpis ){ for(0..$#weights){                         $lcycles[$_] = $lcpis[$_]*$linstrs[$_]; $wcpi += $weights[$_]*$lcpis[$_];   } }
   else        { for(0..$#weights){ if( $lcycles[$_] > 0 ){ push @lcpis, $lcycles[$_]/$linstrs[$_]; $wcpi += $weights[$_]*$lcpis[$_]; } } }
   if( defined $max_adj )
   {
       if(TRUE)
       {
          #sum( ( wi + k/n ) * ci ) + sum( ( wj - k/p ) * cj ) == pcpi;
          #wcpi + sum(ci)*k/n - sum(cj)*k/p = pcpi
          #(pcpi - wcpi)/(sum(ci)/n-sum(cj)/p) = k
          #
          #sum( wi*k*ci ) + sum( wj*(k-1)*cj ) = pcpi, k > 1
          #k*( sum(wi*ci) + sum(wj*cj) ) = pcpi + sum(wj*cj)
          #k = ( pcpi + sum(wj*cj) ) / ( sum(wi*ci) + sum(wj*cj) )
          #
          #sum( wi*(1+k/sum(wi))*ci ) + sum( wj*(1-k/sum(wj))*cj ) = pcpi
          #wcpi + sum(wi*ci)*k/sum(wi) - sum(wj*cj)*k/sum(wj) = pcpi
          #(pcpi - wcpi)/(sum(wi*ci)/sum(wi)-sum(wj*cj)/sum(wj)) = k
          #
          my $gtsum = 0.0;
          my $gtcnt = 0.0;
          my $gtwsm = 0.0;
          my $gtwts = 0.0;
          my $lesum = 0.0;
          my $lecnt = 0.0;
          my $lewsm = 0.0;
          my $lewts = 0.0;
          for(0..$#lcpis)
          {
             if( $lcpis[$_] > $pcpi ){ $gtsum+= $lcpis[$_];$gtcnt++;$gtwsm+= $lcpis[$_]*$weights[$_];$gtwts+= $weights[$_]; }
             else                    { $lesum+= $lcpis[$_];$lecnt++;$lewsm+= $lcpis[$_]*$weights[$_];$lewts+= $weights[$_]; }
          }
          if( $gtcnt > 0 && $lecnt > 0 )
          {
             #my $k = ( $pcpi - $wcpi ) / ( $gtsum/$gtcnt - $lesum/$lecnt );
             #   $k = ( $k >= 0.0 ? $max_adj : -$max_adj ) if abs($k) > $max_adj;
             #my $gtofs = +$k/$gtcnt;
             #my $leofs = -$k/$lecnt;
             #my @aweights = ();
             #for(0..$#lcpis)
             #{
             #   my $w = $weights[$_] + ( $lcpis[$_] > $pcpi ? $gtofs : $leofs );
             #   last if $w <= 0;
             #   push @aweights, $w;
             #}
             my $k = ( $pcpi - $wcpi ) / ( $lewsm/$lewts - $gtwsm/$gtwts );
                $k = ( $k >= 0.0 ? $max_adj*$gtwts : -$max_adj*$gtwts ) if abs($k) > $max_adj*$gtwts;
                $k = ( $k >= 0.0 ? $max_adj*$lewts : -$max_adj*$lewts ) if abs($k) > $max_adj*$lewts;
             my $gtofs = -$k/$gtwts;
             my $leofs =  $k/$lewts;
             my @aweights = ();
             for(0..$#lcpis)
             {
                my $w = $weights[$_] * ( 1.0 + ( $lcpis[$_] > $pcpi ? $gtofs : $leofs ) );
                last if $w <= 0;
                push @aweights, $w;
             }
             if( (scalar @aweights) == (scalar @weights) )
             { 
                $wcpi = 0.0;
                for(0..$#weights)
                { 
                   push @scales, $aweights[$_]/$weights[$_];
                   $weights[$_] = $aweights[$_];
                   $wcpi += $weights[$_]*$lcpis[$_]; 
                }
             }
          }
       }
       else
       {
          # sum( wi*ci + (pcpi-wcpi)*wi ) = pcpi
          # ai = wi*( ci + (pcpi-wcpi) ) / ci = wi + wi*(pcpi-wcpi)/ci
          # ki = ai/wi = 1 + (pcpi-wcpi)/ci
          my @aweights = ();
          for(0..$#lcpis)
          {
	     my $k = 1.0+($pcpi-$wcpi)/$lcpis[$_];
                $k = ( $k >= 1.0 ? 1.0+$max_adj : 1.0-$max_adj ) if abs($k-1.0) > $max_adj;
             my $w = $weights[$_]*$k;
             push @aweights, $w;
          }
          if( (scalar @aweights) == (scalar @weights) )
          { 
             $wcpi = 0.0;
             for(0..$#weights)
             { 
                push @scales, $aweights[$_]/$weights[$_];
                $weights[$_] = $aweights[$_];
                $wcpi += $weights[$_]*$lcpis[$_]; 
             }
          }
       }
   }
}
my $wtotal  = 0;
my $winstrs = 0;
if( $weights_rqrd == TRUE )
{
   $winstrs = $prf_instrs;
   for my $g (@gsymbols)
   {
      my $count = 0.0;
      my $sum_w = 0.0;
      my $ldx   = (scalar @{$g->{counts}})-1;
      for(0..$ldx){ if( $g->{counts}[$_] > 0 ){ $sum_w+= $weights[$_];$count+= defined $tavg_instrs ? $weights[$_]*$tavg_instrs*$g->{counts}[$_]/$totals[$_] : $g->{counts}[$_]*$weights[$_]; } }
      $g->{wcount}  = $count > 0 && $sum_w > 0.0 ? int(0.5+$count/$sum_w) : 0;
      $g->{winstrs} = $g->{instrs};
      $wtotal      += $g->{wcount};
   }
}
else
{
   $wtotal  = $gtotal;
   $winstrs = $total_instrs;
   for(@gsymbols)
   {
      $_->{wcount}  = $_->{count}; 
      $_->{winstrs} = $_->{ninstrs};
   }
}

if( $blocks_req == FALSE )
{
   my $quality = 0.0;
   my $rating  = 0;
   if( @gsymbols )
   {
      for( sort { $b->{wcount} <=> $a->{wcount} } @gsymbols )
      {
         last if $_->{wcount} == 0;
         printf "%08x %s [%.1f%%] %s %d", $_->{base}, $_->{module}, 100.0*$_->{wcount}/$wtotal, $_->{name}, $_->{wcount};
         if( $_->{instrs} > 0 )
         {
            if( $_->{cycles} > 0 )
            {
               printf " %.0f", 1.0*$_->{wcount}*( 1.0*$_->{cycles}/$_->{instrs} )*( 1.0*$ccl_rte/$ctb_rte );
               printf " %.3f%%", 100.0*$_->{cycles}/$total_cycles;
               print  ' ', $_->{cycles}*$ccl_rte;
            }
            printf " %.3f%%", 100.0*$_->{instrs}/$total_instrs;
            print ' ', $_->{instrs}*$ctb_rte;
            printf " %.5f", 1.0*$_->{winstrs}/$winstrs;
            $quality+= $_->{ninstrs};
         }
         print "\n";
      }
      print "\nTotal  : ";
   }
   print "$wtotal\n";
   print "Matched: ",$gmatched,"\n" if $gmatched > 0 && $weights_rqrd != TRUE;
   print "pInstrs: ",($prf_instrs+$ign_instrs)*$ctb_rte,"\n" if $prf_instrs > 0;
   print "pCycles: ",($prf_cycles+$ign_cycles)*$ccl_rte,"\n" if $prf_cycles > 0;
   my $grps_rqrd = TRUE;
   if( @grps_info )
   {
      for (@grps_info)
      {
         my $ratio  = 1.0*$_->{instrs} / $_->{hits};
         my $share  = 1.0*$_->{hits}   / $gtotal;
         my $weight = 1.0*$_->{instrs} / $total_instrs;
         if( $grps_rqrd == TRUE ){ $grps_rqrd=FALSE;print "\nGroups : "; }else{ print '   '; }
         printf "%.2f%%->%.2f%% (%d:%.5e", 100.0*$share, 100.0*$weight, $_->{size}, $ratio;
         printf ":%.3f", $_->{cpi} if $_->{cpi} > 0.0;
         print ')'
      }
      print "\n" if $grps_rqrd == FALSE;
      printf "gCPI   : %.3f\n", ($grp_cycles/$grp_instrs) if $grp_instrs > 0 && $grp_cycles > 0;
      $rating = 1+int(0.5+9.0*$quality/$prf_instrs) if $quality > 0 && $prf_instrs > 0;
   }
   printf "\ntCPI   : %.3f\n", ($trc_cycles/$trc_instrs) if $trc_instrs > 0 && $trc_cycles > 0;
   printf "pCPI   : %.3f\n", $pcpi if $pcpi > 0.0;
   if( $weights_rqrd == TRUE )
   {
      if( @scales )
      {
         print "Scale  :";
         for(@scales){ printf " %.3f", $_; }
         print "\n";
      }
      printf "Weights: 100.0%%->%.2f%%", 100.0*($prf_instrs+$ign_instrs)/$total_instrs;
      printf "(%.2f%%)", 100.0*($prf_cycles+$ign_cycles)/$total_cycles if $total_cycles > 0;
      print " w";
      for(@weights){ printf " %.5f", $_; }
      print "\n";
      if( $wcpi > 0.0 )
      {
         printf "wCPI   : %.3f", $wcpi;
         printf "(%+.2f%%)", 100.0*($wcpi-$pcpi)/$pcpi if $pcpi > 0.0;
         print ' == ';
         my $sep_rqrd = FALSE;
         for(0..$#weights)
         {
            if( $lcycles[$_] > 0 )
            {
               if( $sep_rqrd == TRUE ){ print ' + '; }else{ $sep_rqrd = TRUE; }
               printf "%.3f * %.5f", $lcpis[$_], $weights[$_];
            }
         }
         printf "\n";
      }
   }
   printf "\nQuality: %.1f%%\n", 100.0*$quality/$total_instrs if $quality > 0;
   print "Rating : $rating\n" if $rating > 0;
}
else
{
   my $i    = 0; # mix imixes?
   print "#1\n";
   my $mdl  = undef;
   my $base = 0;
   for( sort { length $a->{addr} == length $b->{addr} ? $a->{addr} cmp $b->{addr} : length $a->{addr} <=> length $b->{addr} } @{$info[$i]} )
   {
      if( !exists $_->{symbol} )
      {
         print "#:0\n" if !defined $mdl || $mdl ne '';
         $mdl  = '';
         $base = 0;
      }
      else
      {
         my $mid     = $symbols[$i][$_->{symbol}]{module};
         my $cur_mdl = exists $modules[$i][$mid]{path} ? $modules[$i][$mid]{path} : $modules[$i][$mid]{name};
         if( !defined $mdl || $cur_mdl ne $mdl )
         {
            $mdl  = $cur_mdl;
            $base = exists $modules[$i][$mid]{base} ? $modules[$i][$mid]{base} : 0;
            printf "#%s:%x\n", $mdl, $base;
         }
      }
      printf "%x %d %d\n", (hex $_->{addr})-$base, $_->{size}, $_->{instrs};
   }
}

if( $weights_rqrd == TRUE )
{
   for my $i (0..$#imixes)
   {
      next if $imixes[$i] eq '<-';
      my $wname = $imixes[$i];$wname =~ s/mix-out/weight/io;
      my $lquality = 0.0;
      open WGT, '>', $wname or die;
      my $instrs = 0;
      if( @{$symbols[$i]} )
      {
         for( sort { $b->{count} <=> $a->{count} } @{$symbols[$i]} )
         {
            last if $_->{count} == 0;
            printf WGT "%08x %s [%.1f%%] %s %d", $_->{base}, $modules[$i][$_->{module}]{name}, $_->{count} * 100.0 / $totals[$i], $_->{name}, $_->{count};
            if( $_->{instrs} > 0 )
            {
               $instrs += $_->{instrs};
               if( $_->{cycles} > 0 )
               {
                  printf WGT " %.0f", 1.0 * $_->{count} * ( $_->{cycles}/$_->{instrs} ) * ( $ccl_rte/$ctb_rte );
                  printf WGT " %.3f%%", 100.0 * $_->{cycles} / $total_cycles;
                  print  WGT ' ', $_->{cycles} * $ccl_rte;
               }
               printf WGT " %.3f%%", 100.0 * $_->{instrs} / $total_instrs;
               print  WGT ' ', $_->{instrs} * $ctb_rte;
               my $ninstrs = @scales ? $_->{ninstrs} * $scales[$i] : $_->{ninstrs};
               printf WGT " %.5f", $ninstrs / $prf_instrs;
               $lquality += $ninstrs;
            }
            print WGT "\n";
         }
         print WGT "\nTotal  : ";
      }
      print WGT "$totals[$i]\n";
      print WGT "Matched: ",$matcheds[$i],"\n" if $matcheds[$i] > 0;
      print WGT "pInstrs: ",($instrs+$ignored[$i])*$ctb_rte,"\n" if $instrs > 0;
      print WGT "\n";
      printf WGT "tCPI   : %.3f\n", $lcpis[$i] if $lcycles[$i] > 0;
      printf WGT "Weight : %.5f\n", $weights[$i];
      printf WGT "\nQuality: %.1f%%\n", 100.0 * $lquality/$prf_instrs if $lquality > 0;
      close WGT;
   }
}

exit 0;
